bitkeeper revision 1.1236.1.184 (424d4a00y8MNt89B4nCZ8LKcrTcZUw)
authorkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Fri, 1 Apr 2005 13:17:52 +0000 (13:17 +0000)
committerkaf24@firebug.cl.cam.ac.uk <kaf24@firebug.cl.cam.ac.uk>
Fri, 1 Apr 2005 13:17:52 +0000 (13:17 +0000)
Fix multi-VCPU TLB shootdown interface -- specify pointer to VCPU
bitmap, so it is read at time of flush and not before (which might be
too early, before all updates are flushed, leading to races). Also
add selective multi-VCPU shootdown capability to update_va_mapping()
and use this to make ptep_set_access_flags() a single hypercall.
Signed-off-by: Keir Fraser <keir@xensource.com>
12 files changed:
freebsd-5.3-xen-sparse/i386-xen/i386-xen/xen_machdep.c
freebsd-5.3-xen-sparse/i386-xen/xen/netfront/xn_netfront.c
linux-2.4.29-xen-sparse/mm/memory.c
linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c
linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c
linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c
linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c
linux-2.6.11-xen-sparse/drivers/xen/usbback/usbback.c
linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h
netbsd-2.0-xen-sparse/sys/arch/xen/xen/if_xennet.c
xen/arch/x86/mm.c
xen/include/public/xen.h

index 4fa020f531e7d55631eb56339bc9290440ab55c3..1dcd9448d3ef44dbfd1b350a7138806933d92e99 100644 (file)
@@ -540,7 +540,7 @@ mcl_queue_pt_update(vm_offset_t va, vm_paddr_t ma)
     MCL_QUEUE[MCL_IDX].op = __HYPERVISOR_update_va_mapping;
     MCL_QUEUE[MCL_IDX].args[0] = (unsigned long)va;
     MCL_QUEUE[MCL_IDX].args[1] = (unsigned long)ma;
-    MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG_LOCAL;
+    MCL_QUEUE[MCL_IDX].args[2] = UVMF_INVLPG|UVMF_LOCAL;
     mcl_increment_idx();
 }
 
index 40d9e4636ebd691e57bbcdfe9756376a7055ced5..1de71545fb500b5f0352dec0bd31ed03cfe277f5 100644 (file)
@@ -440,7 +440,7 @@ xn_alloc_rx_buffers(struct xn_softc *sc)
     PT_UPDATES_FLUSH();
 
     /* After all PTEs have been zapped we blow away stale TLB entries. */
-    xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
+    xn_rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
 
     /* Give away a batch of pages. */
     xn_rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
index 883a2928ab095f4deb08c12baf6610e32eac8733..875e5745c4bad6429c0a14aa0257202abc1e970e 100644 (file)
@@ -911,7 +911,7 @@ static inline void establish_pte(struct vm_area_struct * vma, unsigned long addr
 {
 #ifdef CONFIG_XEN
        if ( likely(vma->vm_mm == current->mm) ) {
-               HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG_LOCAL);
+               HYPERVISOR_update_va_mapping(address, entry, UVMF_INVLPG|UVMF_LOCAL);
        } else {
                set_pte(page_table, entry);
                flush_tlb_page(vma, address);
index 82493c1236f1ce59c61174234c788945af1a24bc..d34fd71f8ec6b53c552be06514afbb107fc692a7 100644 (file)
@@ -108,7 +108,7 @@ void xen_tlb_flush_mask(cpumask_t mask)
 {
     struct mmuext_op op;
     op.cmd = MMUEXT_TLB_FLUSH_MULTI;
-    op.cpuset = mask.bits[0];
+    op.cpuset = (unsigned long)mask.bits;
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
@@ -124,7 +124,7 @@ void xen_invlpg_mask(cpumask_t mask, unsigned long ptr)
 {
     struct mmuext_op op;
     op.cmd = MMUEXT_INVLPG_MULTI;
-    op.cpuset = mask.bits[0];
+    op.cpuset = (unsigned long)mask.bits;
     op.linear_addr = ptr & PAGE_MASK;
     BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
index 4c3c8a5d06b176b1fa265e209d628d5722f71a34..a827ab4eda8c0d92bf0a06ff7e2de855eefb187e 100644 (file)
@@ -111,7 +111,7 @@ static void fast_flush_area(int idx, int nr_pages)
         mcl[i].args[2] = 0;
     }
 
-    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL;
+    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
     if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
         BUG();
 }
index 9079ea2d47372d47392ff273a5726b914356360a..d08c296a028c8ab4cf492e8f7b8953a03e0b1db5 100644 (file)
@@ -270,7 +270,7 @@ static void net_rx_action(unsigned long unused)
     mcl->args[3] = DOMID_SELF;
     mcl++;
 
-    mcl[-3].args[2] = UVMF_TLB_FLUSH_ALL;
+    mcl[-3].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
     if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
         BUG();
 
@@ -429,7 +429,7 @@ static void net_tx_action(unsigned long unused)
         mcl++;     
     }
 
-    mcl[-1].args[2] = UVMF_TLB_FLUSH_ALL;
+    mcl[-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
     if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
         BUG();
 
index 0ac6747900560958202b50024dcab8ba9c5e8de5..f72929502171400023305f13b7a23196b0ab258b 100644 (file)
@@ -388,7 +388,7 @@ static void network_alloc_rx_buffers(struct net_device *dev)
     }
 
     /* After all PTEs have been zapped we blow away stale TLB entries. */
-    rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH_ALL;
+    rx_mcl[i-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
 
     /* Give away a batch of pages. */
     rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
index 72a6be3a2f5b62dfddce82325fb76799fcb0889d..42439405cd800a89611e687a6dce23d324725ba8 100644 (file)
@@ -195,7 +195,7 @@ static void fast_flush_area(int idx, int nr_pages)
         mcl[i].args[2] = 0;
     }
 
-    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH_ALL;
+    mcl[nr_pages-1].args[2] = UVMF_TLB_FLUSH|UVMF_ALL;
     if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
         BUG();
 }
index 5333fde72bcc09690d1097536d68b9eaec476dd7..5afb8ced1f805087f7044dab68e0e9d8f647bd93 100644 (file)
@@ -407,8 +407,7 @@ extern void noexec_setup(const char *str);
        do {                                                              \
                if (__dirty) {                                            \
                        if ( likely((__vma)->vm_mm == current->mm) ) {    \
-                           HYPERVISOR_update_va_mapping((__address), (__entry), 0); \
-                           flush_tlb_page((__vma), (__address));         \
+                           HYPERVISOR_update_va_mapping((__address), (__entry), UVMF_INVLPG|UVMF_MULTI|(unsigned long)((__vma)->vm_mm->cpu_vm_mask.bits)); \
                        } else {                                          \
                             xen_l1_entry_update((__ptep), (__entry).pte_low); \
                            flush_tlb_page((__vma), (__address));         \
index 687b21080bc1675f09bb6716b3f9c642ae98333b..2ffe6da2a0c4d16fd084311cf8db0f74a1e6a431 100644 (file)
@@ -598,7 +598,7 @@ xennet_rx_push_buffer(struct xennet_softc *sc, int id)
        xpq_flush_queue();
 
        /* After all PTEs have been zapped we blow away stale TLB entries. */
-       rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
+       rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
 
        /* Give away a batch of pages. */
        rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
@@ -681,7 +681,7 @@ xen_network_handler(void *arg)
                mcl->op = __HYPERVISOR_update_va_mapping;
                mcl->args[0] = sc->sc_rx_bufa[rx->id].xb_rx.xbrx_va;
                mcl->args[1] = (rx->addr & PG_FRAME) | PG_V|PG_KW;
-               mcl->args[2] = UVMF_TLB_FLUSH_LOCAL; // 0;
+               mcl->args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL; // 0;
                mcl++;
 
                xpmap_phys_to_machine_mapping
@@ -898,7 +898,7 @@ network_alloc_rx_buffers(struct xennet_softc *sc)
        xpq_flush_queue();
 
        /* After all PTEs have been zapped we blow away stale TLB entries. */
-       rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH_LOCAL;
+       rx_mcl[nr_pfns-1].args[2] = UVMF_TLB_FLUSH|UVMF_LOCAL;
 
        /* Give away a batch of pages. */
        rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
index 845fbc9f66ecada394001fef55c9a6543c46ba3f..d1eb5650b8fd92f10c4f306b22df69cd7cf4b6e3 100644 (file)
@@ -1329,6 +1329,25 @@ static int set_foreigndom(unsigned int cpu, domid_t domid)
     return okay;
 }
 
+static inline unsigned long vcpuset_to_pcpuset(
+    struct domain *d, unsigned long vset)
+{
+    unsigned int  vcpu;
+    unsigned long pset = 0;
+    struct exec_domain *ed;
+
+    while ( vset != 0 )
+    {
+        vcpu = find_first_set_bit(vset);
+        vset &= ~(1UL << vcpu);
+        if ( (vcpu < MAX_VIRT_CPUS) &&
+             ((ed = d->exec_domain[vcpu]) != NULL) )
+            pset |= 1UL << ed->processor;
+    }
+
+    return pset;
+}
+
 int do_mmuext_op(
     struct mmuext_op *uops,
     unsigned int count,
@@ -1478,19 +1497,17 @@ int do_mmuext_op(
         case MMUEXT_TLB_FLUSH_MULTI:
         case MMUEXT_INVLPG_MULTI:
         {
-            unsigned long inset = op.cpuset, outset = 0;
-            while ( inset != 0 )
+            unsigned long vset, pset;
+            if ( unlikely(get_user(vset, (unsigned long *)op.cpuset)) )
             {
-                unsigned int vcpu = find_first_set_bit(inset);
-                inset &= ~(1UL<<vcpu);
-                if ( (vcpu < MAX_VIRT_CPUS) &&
-                     ((ed = d->exec_domain[vcpu]) != NULL) )
-                    outset |= 1UL << ed->processor;
+                okay = 0;
+                break;
             }
+            pset = vcpuset_to_pcpuset(d, vset);
             if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
-                flush_tlb_mask(outset & d->cpuset);
+                flush_tlb_mask(pset & d->cpuset);
             else
-                flush_tlb_one_mask(outset & d->cpuset, op.linear_addr);
+                flush_tlb_one_mask(pset & d->cpuset, op.linear_addr);
             break;
         }
 
@@ -1999,6 +2016,7 @@ int do_update_va_mapping(unsigned long va,
     struct exec_domain *ed  = current;
     struct domain      *d   = ed->domain;
     unsigned int        cpu = ed->processor;
+    unsigned long       vset, pset, bmap_ptr;
     int                 rc = 0;
 
     perfc_incrc(calls_to_update_va);
@@ -2013,11 +2031,6 @@ int do_update_va_mapping(unsigned long va,
 
     cleanup_writable_pagetable(d);
 
-    /*
-     * XXX When we make this support 4MB superpages we should also deal with 
-     * the case of updating L2 entries.
-     */
-
     if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
                                 mk_l1_pgentry(val))) )
         rc = -EINVAL;
@@ -2025,21 +2038,42 @@ int do_update_va_mapping(unsigned long va,
     if ( unlikely(shadow_mode_enabled(d)) )
         update_shadow_va_mapping(va, val, ed, d);
 
-    switch ( flags & UVMF_FLUSH_MASK )
+    switch ( flags & UVMF_FLUSHTYPE_MASK )
     {
-    case UVMF_TLB_FLUSH_LOCAL:
-        local_flush_tlb();
-        percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
-        break;
-    case UVMF_TLB_FLUSH_ALL:
-        flush_tlb_mask(d->cpuset);
-        percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
-        break;
-    case UVMF_INVLPG_LOCAL:
-        local_flush_tlb_one(va);
+    case UVMF_TLB_FLUSH:
+        switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
+        {
+        case UVMF_LOCAL:
+            local_flush_tlb();
+            break;
+        case UVMF_ALL:
+            flush_tlb_mask(d->cpuset);
+            break;
+        default:
+            if ( unlikely(get_user(vset, (unsigned long *)bmap_ptr)) )
+                rc = -EFAULT;
+            pset = vcpuset_to_pcpuset(d, vset);
+            flush_tlb_mask(pset & d->cpuset);
+            break;
+        }
         break;
-    case UVMF_INVLPG_ALL:
-        flush_tlb_one_mask(d->cpuset, va);
+
+    case UVMF_INVLPG:
+        switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
+        {
+        case UVMF_LOCAL:
+            local_flush_tlb_one(va);
+            break;
+        case UVMF_ALL:
+            flush_tlb_one_mask(d->cpuset, va);
+            break;
+        default:
+            if ( unlikely(get_user(vset, (unsigned long *)bmap_ptr)) )
+                rc = -EFAULT;
+            pset = vcpuset_to_pcpuset(d, vset);
+            flush_tlb_one_mask(pset & d->cpuset, va);
+            break;
+        }
         break;
     }
 
index 0ec17675c25d98faf977fc7700cf1fd0495af490..43a2e87e02c2cb513e3179b839c9ac4e4f2b41aa 100644 (file)
  * linear_addr: Linear address to be flushed from the local TLB.
  * 
  * cmd: MMUEXT_TLB_FLUSH_MULTI
- * cpuset: Set of VCPUs to be flushed.
+ * cpuset: Pointer to bitmap of VCPUs to be flushed.
  * 
  * cmd: MMUEXT_INVLPG_MULTI
  * linear_addr: Linear address to be flushed.
- * cpuset: Set of VCPUs to be flushed.
+ * cpuset: Pointer to bitmap of VCPUs to be flushed.
  * 
  * cmd: MMUEXT_TLB_FLUSH_ALL
  * No additional arguments. Flushes all VCPUs' TLBs.
@@ -188,17 +188,21 @@ struct mmuext_op {
         /* SET_LDT */
         unsigned int nr_ents;
         /* TLB_FLUSH_MULTI, INVLPG_MULTI */
-        unsigned long cpuset;
+        void *cpuset;
     };
 };
 #endif
 
 /* These are passed as 'flags' to update_va_mapping. They can be ORed. */
-#define UVMF_TLB_FLUSH_LOCAL    1 /* Flush local CPU's TLB.          */
-#define UVMF_INVLPG_LOCAL       2 /* Flush VA from local CPU's TLB.  */
-#define UVMF_TLB_FLUSH_ALL      3 /* Flush all TLBs.                 */
-#define UVMF_INVLPG_ALL         4 /* Flush VA from all TLBs.         */
-#define UVMF_FLUSH_MASK         7
+/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap.   */
+/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer.         */
+#define UVMF_NONE               (0UL)    /* No flushing at all.   */
+#define UVMF_TLB_FLUSH          (1UL<<0) /* Flush entire TLB(s).  */
+#define UVMF_INVLPG             (2UL<<0) /* Flush only one entry. */
+#define UVMF_FLUSHTYPE_MASK     (3UL<<0)
+#define UVMF_MULTI              (0UL<<1) /* Flush subset of TLBs. */
+#define UVMF_LOCAL              (0UL<<2) /* Flush local TLB.      */
+#define UVMF_ALL                (1UL<<2) /* Flush all TLBs.       */
 
 /*
  * Commands to HYPERVISOR_sched_op().